{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "EmailSpam",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "uaWxiQkShfNA"
      },
      "source": [
        "import pandas as pd\r\n",
        "from sklearn.feature_extraction.text import CountVectorizer\r\n",
        "from sklearn.naive_bayes import MultinomialNB, GaussianNB\r\n",
        "from sklearn import svm\r\n",
        "from sklearn.model_selection import GridSearchCV\r\n",
        "from sklearn.externals import joblib"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "itNUZqq6h2nf",
        "outputId": "2004652d-9423-49b1-efdf-a70320571e14"
      },
      "source": [
        "dataframe = pd.read_csv(\"spam.csv\")\r\n",
        "print(dataframe.describe())"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "       Label               EmailText\n",
            "count   5572                    5572\n",
            "unique     2                    5169\n",
            "top      ham  Sorry, I'll call later\n",
            "freq    4825                      30\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "h10NUQBNh_O1"
      },
      "source": [
        "x = dataframe[\"EmailText\"]\r\n",
        "y = dataframe[\"Label\"]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "kUzSKCRMiHK8"
      },
      "source": [
        "x_train,y_train = x[0:4457],y[0:4457]\r\n",
        "x_test,y_test = x[4457:],y[4457:]"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9eSOLPsbiIC0",
        "outputId": "a3948b6d-4a28-4c1b-ecbc-056185ee5567"
      },
      "source": [
        "cv = CountVectorizer()  \r\n",
        "features = cv.fit_transform(x_train)\r\n",
        "\r\n",
        "\r\n",
        "tuned_parameters = {'kernel': ['rbf','linear'], 'gamma': [1e-3, 1e-4],\r\n",
        "                     'C': [1, 10, 100, 1000]}\r\n",
        "\r\n",
        "model = GridSearchCV(svm.SVC(), tuned_parameters)\r\n",
        "\r\n",
        "model.fit(features,y_train)\r\n",
        "\r\n",
        "\r\n",
        "print(model.best_params_)\r\n",
        "\r\n",
        "print(model.score(cv.transform(x_test),y_test))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "{'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}\n",
            "0.9874439461883409\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5G-vTlq5jJOm",
        "outputId": "226b896c-bbb4-4904-8ab6-c9f3d433e86c"
      },
      "source": [
        "joblib.dump(model.best_estimator_, 'filename.pkl', compress = 1)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "['filename.pkl']"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 104
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "c-ZN9AsVwXCZ",
        "outputId": "f5b81de7-4c50-4b8a-c784-ddedc7a8101b"
      },
      "source": [
        "print(x)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0       Go until jurong point, crazy.. Available only ...\n",
            "1                           Ok lar... Joking wif u oni...\n",
            "2       Free entry in 2 a wkly comp to win FA Cup fina...\n",
            "3       U dun say so early hor... U c already then say...\n",
            "4       Nah I don't think he goes to usf, he lives aro...\n",
            "                              ...                        \n",
            "5567    This is the 2nd time we have tried 2 contact u...\n",
            "5568               Will Ã_ b going to esplanade fr home?\n",
            "5569    Pity, * was in mood for that. So...any other s...\n",
            "5570    The guy did some bitching but I acted like i'd...\n",
            "5571                           Rofl. Its true to its name\n",
            "Name: EmailText, Length: 5572, dtype: object\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_DCmSajzwYIk",
        "outputId": "2598948f-3f7f-4e8d-fcc4-5c28ddd72fe4"
      },
      "source": [
        "print(y)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "0        ham\n",
            "1        ham\n",
            "2       spam\n",
            "3        ham\n",
            "4        ham\n",
            "        ... \n",
            "5567    spam\n",
            "5568     ham\n",
            "5569     ham\n",
            "5570     ham\n",
            "5571     ham\n",
            "Name: Label, Length: 5572, dtype: object\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gp-pgPMZwbTv",
        "outputId": "6ac322bf-d9f8-45dc-a2fd-5a374705ad5d"
      },
      "source": [
        "y.count()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "5572"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 107
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CBowi4wuwhBq",
        "outputId": "2fbd015f-fa87-4f75-9ee9-503f99529d3c"
      },
      "source": [
        "y.unique()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array(['ham', 'spam'], dtype=object)"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 108
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Y4kZFO3Bwvf7",
        "outputId": "0774b301-824f-41d4-b8fa-b1584f4ad550"
      },
      "source": [
        "dataframe['Label'].value_counts()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "ham     4825\n",
              "spam     747\n",
              "Name: Label, dtype: int64"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 109
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 302
        },
        "id": "kRkpulHD0V60",
        "outputId": "940c35bf-c6c5-4700-edf0-7c7a2f13cf67"
      },
      "source": [
        "dataframe['Label'].value_counts().plot(kind='bar')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "<matplotlib.axes._subplots.AxesSubplot at 0x7f78a1e5c400>"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 110
        },
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAELCAYAAAA1AlaNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAPtElEQVR4nO3df6xkZX3H8fdHFvxRqyxypWQXXYybNKhU8RZo9A8D6bJC0yWpUkxTN3aT/Yca25ooNhoiSAJtItVGTbdCulAVidWAiuIGpT/SouyK5aeEW35k2YK7uAtqjdTFb/+Y5+K43Mu9C/fObOd5v5LJnPM9z8x8T5j9zOHMM+emqpAk9eF5425AkjQ6hr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcWFfpJHkhye5LvJdneakcl2Zbk3na/stWT5ONJZpLcluSkoefZ2Mbfm2Tj8uySJGk+Wcw8/SQPANNV9ehQ7a+AvVV1SZLzgZVV9f4kZwLvBs4ETgE+VlWnJDkK2A5MAwXsAN5YVfvme92jjz661qxZ86x3TpJ6tGPHjkeramqubSuew/NuAN7SlrcCNwHvb/Ura/BpcnOSI5Mc28Zuq6q9AEm2AeuBz833AmvWrGH79u3PoUVJ6k+SB+fbtthz+gV8I8mOJJtb7ZiqergtPwIc05ZXATuHHvtQq81XlySNyGKP9N9cVbuSvBzYluT7wxurqpIsyfUc2ofKZoBXvOIVS/GUkqRmUUf6VbWr3e8GvgScDPygnbah3e9uw3cBxw09fHWrzVc/8LW2VNV0VU1PTc15SkqS9CwtGPpJfi3Jr88uA+uAO4DrgNkZOBuBa9vydcA72yyeU4HH22mgG4B1SVa2mT7rWk2SNCKLOb1zDPClJLPjP1tVX09yC3BNkk3Ag8A5bfz1DGbuzAA/Bd4FUFV7k1wE3NLGXTj7pa4kaTQWNWVzXKanp8vZO5J0cJLsqKrpubb5i1xJ6oihL0kdeS4/zlKz5vyvjruFifLAJWeNuwVpYnmkL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRxYd+kkOS3Jrkq+09eOTfDvJTJLPJzmi1Z/f1mfa9jVDz/GBVr8nyRlLvTOSpGd2MEf67wHuHlq/FLisql4N7AM2tfomYF+rX9bGkeQE4FzgNcB64JNJDntu7UuSDsaiQj/JauAs4NNtPcBpwBfakK3A2W15Q1unbT+9jd8AXF1VT1TV/cAMcPJS7IQkaXEWe6T/N8D7gF+09ZcBj1XV/rb+ELCqLa8CdgK07Y+38U/V53iMJGkEFgz9JL8H7K6qHSPohySbk2xPsn3Pnj2jeElJ6sZijvTfBPx+kgeAqxmc1vkYcGSSFW3MamBXW94FHAfQtr8U+OFwfY7HPKWqtlTVdFVNT01NHfQOSZLmt2DoV9UHqmp1Va1h8EXsN6vqj4BvAW9rwzYC17bl69o6bfs3q6pa/dw2u+d4YC3wnSXbE0nSglYsPGRe7weuTvIR4Fbg8la/HLgqyQywl8EHBVV1Z5JrgLuA/cB5VfXkc3h9SdJBOqjQr6qbgJva8n3MMfumqn4GvH2ex18MXHywTUqSloa/yJWkjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUkQVDP8kLknwnyX8muTPJh1v9+CTfTjKT5PNJjmj157f1mbZ9zdBzfaDV70lyxnLtlCRpbos50n8COK2qfgt4PbA+yanApcBlVfVqYB+wqY3fBOxr9cvaOJKcAJwLvAZYD3wyyWFLuTOSpGe2YOjXwE/a6uHtVsBpwBdafStwdlve0NZp209Pkla/uqqeqKr7gRng5CXZC0nSoizqnH6Sw5J8D9gNbAP+C3isqva3IQ8Bq9ryKmAnQNv+OPCy4focj5EkjcCiQr+qnqyq1wOrGRyd/+ZyNZRkc5LtSbbv2bNnuV5Gkrp0ULN3quox4FvA7wBHJlnRNq0GdrXlXcBxAG37S4EfDtfneMzwa2ypqumqmp6amjqY9iRJC1jM7J2pJEe25RcCvwvczSD839aGbQSubcvXtXXa9m9WVbX6uW12z/HAWuA7S7UjkqSFrVh4CMcCW9tMm+cB11TVV5LcBVyd5CPArcDlbfzlwFVJZoC9DGbsUFV3JrkGuAvYD5xXVU8u7e5Ikp7JgqFfVbcBb5ijfh9zzL6pqp8Bb5/nuS4GLj74NiVJS8Ff5EpSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqyIKhn+S4JN9KcleSO5O8p9WPSrItyb3tfmWrJ8nHk8wkuS3JSUPPtbGNvzfJxuXbLUnSXBZzpL8feG9VnQCcCpyX5ATgfODGqloL3NjWAd4KrG23zcCnYPAhAVwAnAKcDFww+0EhSRqNBUO/qh6uqu+25R8DdwOrgA3A1jZsK3B2W94AXFkDNwNHJjkWOAPYVlV7q2ofsA1Yv6R7I0l6Rgd1Tj/JGuANwLeBY6rq4bbpEeCYtrwK2Dn0sIdabb66JGlEFh36SV4M/BPwZ1X1o+FtVVVALUVDSTYn2Z5k+549e5biKSVJzaJCP8nhDAL/M1X1xVb+QTttQ7vf3eq7gOOGHr661ear/4qq2lJV01U1PTU1dTD7IklawGJm7wS4HLi7qj46tOk6YHYGzkbg2qH6O9ssnlOBx9tpoBuAdUlWti9w17WaJGlEVixizJuAPwZuT/K9VvtL4BLgmiSbgAeBc9q264EzgRngp8C7AKpqb5KLgFvauAurau+S7IUkaVEWDP2q+jcg82w+fY7xBZw3z3NdAVxxMA1KkpaOv8iVpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1JEFQz/JFUl2J7ljqHZUkm1J7m33K1s9ST6eZCbJbUlOGnrMxjb+3iQbl2d3JEnPZDFH+v8ArD+gdj5wY1WtBW5s6wBvBda222bgUzD4kAAuAE4BTgYumP2gkCSNzoKhX1X/Auw9oLwB2NqWtwJnD9WvrIGbgSOTHAucAWyrqr1VtQ/YxtM/SCRJy+zZntM/pqoebsuPAMe05VXAzqFxD7XafHVJ0gg95y9yq6qAWoJeAEiyOcn2JNv37NmzVE8rSeLZh/4P2mkb2v3uVt8FHDc0bnWrzVd/mqraUlXTVTU9NTX1LNuTJM3l2Yb+dcDsDJyNwLVD9Xe2WTynAo+300A3AOuSrGxf4K5rNUnSCK1YaECSzwFvAY5O8hCDWTiXANck2QQ8CJzThl8PnAnMAD8F3gVQVXuTXATc0sZdWFUHfjksSVpmC4Z+Vb1jnk2nzzG2gPPmeZ4rgCsOqjtJ0pLyF7mS1BFDX5I6YuhLUkcMfUnqiKEvSR1ZcPaOpP/f1pz/1XG3MDEeuOSscbfwnHmkL0kdMfQlqSOGviR1xNCXpI4Y+pLUEUNfkjpi6EtSRwx9SeqIoS9JHTH0Jakjhr4kdcTQl6SOGPqS1BFDX5I6YuhLUkcMfUnqiKEvSR0x9CWpI4a+JHXE0Jekjhj6ktQRQ1+SOmLoS1JHDH1J6oihL0kdMfQlqSOGviR1xNCXpI6MPPSTrE9yT5KZJOeP+vUlqWcjDf0khwGfAN4KnAC8I8kJo+xBkno26iP9k4GZqrqvqv4XuBrYMOIeJKlbow79VcDOofWHWk2SNAIrxt3AgZJsBja31Z8kuWec/UyYo4FHx93EQnLpuDvQGPjeXFqvnG/DqEN/F3Dc0PrqVntKVW0BtoyyqV4k2V5V0+PuQzqQ783RGfXpnVuAtUmOT3IEcC5w3Yh7kKRujfRIv6r2J/lT4AbgMOCKqrpzlD1IUs9Gfk6/qq4Hrh/16wrwtJkOXb43RyRVNe4eJEkj4mUYJKkjhr4kdcTQl6SOHHI/ztLSS3IisIah/95V9cWxNSTx1LW4zuLp782PjqunHhj6Ey7JFcCJwJ3AL1q5AENf4/Zl4GfA7fzyvallZuhPvlOryiuZ6lC0uqpOHHcTvfGc/uT7Dy9frUPU15KsG3cTvfFIf/JdySD4HwGeAAKUR1g6BNwMfCnJ84Cf88v35kvG29Zk88dZEy7JDPAXHHDetKoeHFtTEpDkfgZ/T+P2MohGxiP9ybenqryonQ5FO4E7DPzRMvQn361JPstgpsQTs0WnbOoQcB9wU5Kv8avvTadsLiNDf/K9kME/qOEvzJyyqUPB/e12RLtpBDynL0kd8Uh/wiV5AbAJeA3wgtl6Vf3J2JqSgCRTwPt4+nvztLE11QHn6U++q4DfAM4A/pnBn6j88Vg7kgY+A3wfOB74MPAAg7+up2Xk6Z0Jl+TWqnpDktuq6sQkhwP/WlWnjrs39S3Jjqp64+x7s9VuqarfHndvk8zTO5Pv5+3+sSSvBR4BXj7GfqRZs+/Nh5OcBfw3cNQY++mCoT/5tiRZCXyQwR+hfzHwofG2JAHwkSQvBd4L/C3wEuDPx9vS5PP0zoRL8nzgDxhcvvbwVq6qunBsTUkaG7/InXzXMvip+37gJ+32P2PtSAKSvCrJl5M8mmR3kmuTvGrcfU06j/QnXJI7quq14+5DOlCSm4FPAJ9rpXOBd1fVKePravJ5pD/5/j3J68bdhDSHF1XVVVW1v93+kaH5+loeHulPqCS3M7jcwgpgLYPrnHhpZR0yklwK7AOuZvBe/UNgJfDXAFW1d3zdTS5Df0IleeUzbffSyhq3dmnlWbNBlNn1qvL8/jIw9CWNRZJzgK9X1Y+SfAg4Cbioqr475tYmmuf0JY3LB1vgvxk4Dfg08Kkx9zTxDH1J4/Jkuz8L+Puq+ipeYnnZGfqSxmVXkr9j8AXu9e2HhGbSMvOcvqSxSPIiYD2Dv5F7b5JjgddV1TfG3NpEM/QlqSP+r5QkdcTQl6SOGPqS1BFDX5I6YuhLUkf+D3zEF9VHN8EeAAAAAElFTkSuQmCC\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "j60KcUkEAjyj",
        "outputId": "198fc6e3-fe59-4fed-c8a4-4408dce18375"
      },
      "source": [
        "loaded_model = joblib.load('/content/filename.pkl')\r\n",
        "a = input('Enter the text:- ')\r\n",
        "result = model.predict(cv.transform([a]))\r\n",
        "print(result[0])"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Enter the text:- Urgent UR awarded a complimentary trip to EuroDisinc Trav, Aco&Entry41 Or ÃƒÂ¥Ã‚Â£1000. To claim txt DIS to 87121 18+6*ÃƒÂ¥Ã‚Â£1.50(moreFrmMob. ShrAcomOrSglSuplt)10, LS1 3AJ\n",
            "spam\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}